package dianshiju

import us.codecraft.webmagic.Page
import us.codecraft.webmagic.Site
import us.codecraft.webmagic.processor.PageProcessor

class YoukuPageProcessor : PageProcessor {
    override fun getSite(): Site {
        return Site.me().setRetryTimes(3).setSleepTime(1000).setCycleRetryTimes(3).setTimeOut(300_000).setRetrySleepTime(10_000)
    }

    override fun process(page: Page) {
       println(page.request.url)
        val html = page.html
        val box =  html.`$`(".s-body .yk-content  .vaule_main .box-series")
        if(box.match()){
            val list = html.`$`(".s-body .yk-content .vaule_main .box-series .panel li")
            list.nodes().forEach {
                val a = it.xpath("//ul[@class='info-list']/li[@class='title']/a")
                a?.let{
                    val urls = it.links().all()

                    page.addTargetRequests(urls)  //detail
                }
            }
            val next = html.`$`(".s-body .yk-content .vaule_main .yk-pager .yk-pages li:last-child a")
            val nl = next.links().all()
            println(nl)
            page.addTargetRequests(nl)
        }else{
            val desc = html.xpath("//div[@class='title-wrap']")
            if(desc.match()){
                val name = desc.xpath("//h1/span[1]/a/text()")
                page.putField("name",name.get())
                val tags = desc.xpath("//div/span[3]/a/text()").all()
                page.putField("tags", tags)
            }
//
//
        }
    }
}